/*

1b_sim_use.do
(this do file is called from 01_make_data.do)

Purpose: discussed below
Inputs: sim_use
Outputs: sim-use-portfolio-level, sim-use-student-level
	
*/


/*
	Generate outcomes describing the way people use the simulator
	
	Input data:
		- Student x sim run x rank level
	Output:
		- Student level 
		
	Outcomes:
		- Length of initial app 
		- Number of simulations run
		- Number of different schools simulated
		- Number of schools that weren't in initial application simulated
		- 1(Any sim run with different schools than in initial app)
		- 1(Any sim run with a different order of schools than in initial app & constant length)
		- 1(Any sim run with a different N of schools than in initial app)
		- 1(Any sim run with a lower risk profile than initial app)
		- Risk difference between minimal risk profile compared to intial risk profile
		
*/

	* Load data
	insheet using "$sim_use", clear
	* Timestamp
	split ts_created,p(".")
	
	gen double simrun_ts = clock(ts_created1,"YMD hms")
	format simrun_ts %tc
	drop ts_create*
	
	sort studentid simrun_ts port_id rank
	
	////
	//// Student x sim run x rank level 
	////
	
	* Make sure, default schools are correctly assigned to the last rank for HS apps
	bys studentid port_id: egen max_rank = max(rank)
	tab rank max_rank if priority == 2
	assert rank == max_rank if priority == 2
	
	* Portfolio length
	bys studentid port_id: egen app_lengthx = max(rank) if priority != 2
	bys studentid port_id: egen app_length = mean(app_lengthx) 
	cap drop app_lengthx
	
	* Get portfolio meta data (created in simulator and order of simulations)
	gen sim_port = is_external == "False"
	label var sim_port "Portfolio created in simulator"
	
	* Make sure we always have an app from smartchoice as the first simulated portfolio
	bys studentid (simrun_ts): assert sim_port[1] == 0 
	bys studentid: egen first_port_id = min(port_id)
	gen first_port = port_id == first_port_id
	
	gen initial_app = first_port == 1 & sim_port == 0
	
	** Number of schools simulated

		* In initial app
		bys studentid: egen N_schools_initialx = rank(programid) if initial_app == 1, track
		bys studentid: egen N_schools_initial = max(N_schools_initialx) 
		cap drop N_schools_initialx
		
		* All schools
		bys studentid programid (simrun_ts): gen first_school_obs = _n == 1
		bys studentid: egen N_schools_all = sum(first_school_obs)
		cap drop first_school_obs	
		
		* Not in initial app
		gen N_schools_not_initial = N_schools_all - N_schools_initial
	
	
	////
	//// Bring to student x sim run level
	////
	
	reshape wide programid priority market belief ratex uncond_ratex, i(studentid port_id) j(rank)
	
	bys studentid (port_id): egen sim_rank = rank(port_id) ,track

	///  Baseline outcomes (from initial smartchoice app)
		
		* Intial app length
		gen initial_app_lengthx = app_length if initial_app == 1
		bys studentid: egen initial_app_length = mean(initial_app_lengthx)
		cap drop initial_app_lengthx
		
		* Initial choices
		forv k = 1/7 {
			gen initial_programid`k'x = programid`k' if initial_app == 1
			bys studentid: egen initial_programid`k' = mean(initial_programid`k'x)
			cap drop initial_programid`k'x
			
		}
		
		* Initial risk
		gen initial_riskx = risk if initial_app == 1
		bys studentid: egen initial_risk = mean(initial_riskx)
		cap drop initial_riskx
	
	////  Comparison Simulator use vs baseline
		
		** Number of simulations
		bys studentid: egen N_sims = max(sim_rank)
	
		** Sim with more apps 
		gen longer_app = app_length > initial_app_length 
		
		** Sim with less apps 
		gen shorter_app = app_length < initial_app_length 
		
		** Sim with same number of apps but different order
		
		gen diff_schools = app_length == initial_app_length & ///
			( ///
			programid1 == initial_programid1 & programid2 == initial_programid2 & ///
			programid3 == initial_programid3 & programid4 == initial_programid4 & ///
			programid5 == initial_programid5 & programid6 == initial_programid6 & ///
			programid7 == initial_programid7 ///
			) == 0
		
		** Sim with any different (or additional) schools
		gen diff_any = longer_app == 1 | diff_schools == 1 | shorter_app == 1
		
		gen not_initial_app = initial_app == 0 & diff_any == 1

		** Sim with any lower risk profile
		gen lower_risk = risk < initial_risk
		gen risk_diff = (initial_risk - risk) / 100
		
		foreach var in not_initial_app longer_app shorter_app diff_any diff_schools lower_risk risk_diff {
			
			bys studentid: egen sim_`var' = max(`var')
			
		}
		gen year = 2020
		save "$int/sim-use-portfolio-level", replace
		
	///
	/// Bring data to student level
	///
	
	keep studentid year grade sim_*  N_* 
	drop sim_port sim_rank
	
	duplicates drop
	isid studentid
	
	save "$int/sim-use-student-level", replace